در این پروژه رخدادهای امنیت پروازها از سال ۱۹۱۹ تا کنون را بررسی می کنیم.
توضیحات نحوه ی جمع آوری داده ها و پاکسازی آن در فایل report.pdf آمده است. همچنین در فاز پیشین تحلیل مکاشفه ای بر روی داده ها صورت گرفته و به سوالات ۱ و ۶ و ۷ پاسخ داده شده بود.
داده های ذخیره شده از طریق لینک زیر قابل دسترسی است:
https://github.com/Ajal88/DA_Project
هم چنین گزارش پروژه از طریق لینک زیر نیز قابل مشاهده است:
https://ajal88.github.io/DA_Project/Report_Phase2.html
کتابخانه های مورد نیاز و بارگذاری داده ها
library(readr)
library(dplyr)
library(stringr)
library(highcharter)
library(ggplot2)
library(stringr)
library(topicmodels)
library(tidytext)
casn <- readr::read_csv("Data/asn_c.csv") %>% as.data.frame(stringsAsFactors = F) %>%
mutate(Total_occupants = ifelse(Total_occupants == 0 & Total_fatalities != 0, Total_fatalities, Total_occupants),
Total_survivors = abs(Total_occupants - Total_fatalities)) %>%
mutate(Total_survivors = ifelse(Total_survivors > Total_occupants, Total_occupants, abs(Total_occupants - Total_fatalities))) %>%
mutate(is_army = str_detect(Operator, regex("Force|Navy",ignore_case = T))) %>%
mutate(occ_no = row_number())
۱. آنالیز متن و دسته بندی علت وقوع مشکلات پرواز ها
برای این منظور می خواهیم از مدل lda استفاده کنیم. به همین دلیل ابتدا از داده های اصلی تنها شماره ی تصادف و Narrative را انتخاب می کنیم. سپس علت سقوط را به کلمات آن تبدیل می کنیم و stopwords را از آن حذف می کنیم. سپس کلمات رایج در سقوط هواپیما همچون هواپیما و پرواز را از کلمات حذف می کنیم. در نهایت نیز تعداد تکرار هر لغت را برای هر Narrative بدست می آوریم. سپس از آنجایی که LDA با DocumentTermMatrix کار می کند، ساختار داده ی خود را به این صورت تغییر می دهیم. در نهایت نیز مدل خود را با ۲۰ topic لرن می کنیم. از آنجایی که لرن مدل وقت گیر است، مدل را برای استفاده ی آینده ذخیره می کنیم.
cause <- casn %>% select(occ_no, Narrative)
# split into words
cause_word <- cause %>%
unnest_tokens(output = word, input = Narrative)
# plane stopwords
word = c("aircraft", "airplane", "plane", "flight")
plane_stop_words = data_frame(word)
word_counts <- cause_word %>%
anti_join(stop_words) %>%
anti_join(plane_stop_words) %>%
count(occ_no, word, sort = TRUE)
flight_dtm <- word_counts %>%
cast_dtm(occ_no, word, n)accident_lda <- LDA(flight_dtm, k = 20, control = list(seed = 1234))
saveRDS(accident_lda, file="Data/lda.rds")accident_lda = readRDS(file="Data/lda.rds")سپس برای هر topic پنج کلمه ای که بیشترین احتمال حضور در این موضوع دارد را نمایش می دهیم.
accident_topics <- tidy(accident_lda, matrix = "beta")
top_terms <- accident_topics %>%
group_by(topic) %>%
top_n(5, beta) %>%
ungroup() %>%
arrange(topic, -beta)top_terms %>%
mutate(term = reorder(term, beta)) %>%
ggplot(aes(term, beta, fill = factor(topic))) +
geom_col(show.legend = FALSE) +
facet_wrap(~ topic, scales = "free", nrow = 5) +
coord_flip()top_terms_merge <- top_terms %>% group_by(topic) %>% summarise(words = paste(term, collapse=", "))
knitr::kable(top_terms_merge)| topic | words |
|---|---|
| 1 | landing, gear, main, collapsed, undercarriage |
| 2 | sea, missing, water, hit, hangar |
| 3 | runway, feet, short, left, rest |
| 4 | crash, terrain, lake, airstrip, pilot |
| 5 | engine, fuel, wing, left, takeoff |
| 6 | pilot, crew, test, cabin, system |
| 7 | hijacker, demanded, hijackers, passengers, 1 |
| 8 | runway, captain, feet, approach, pilot |
| 9 | damage, airport, sustained, substantial, accident |
| 10 | crashed, mountain, 2, km, antonov |
| 11 | accident, destroyed, killed, airport, crew |
| 12 | fire, caught, ground, destroyed, de |
| 13 | feet, departed, crew, reported, cleared |
| 14 | engine, takeoff, landing, forced, lost |
| 15 | damaged, repair, accident, reportedly, raf |
| 16 | airport, boeing, international, air, otter |
| 17 | dc, 3, struck, 4, douglas |
| 18 | en, route, night, operation, NA |
| 19 | air, force, transport, base, flying |
| 20 | approach, weather, pilot, conditions, visibility |
۲. رتبه بندی علت وقوع مشکلات برای پرواز ها
برای این بخش احتمال هر تاپیک برای هر سند را بدست می آوریم. (gamma) سپس تاپیکی که بیشترین احتمال را داراست به عنوان موضوع سند انتخاب می کنیم. سپس پروازها را بر اساس تاپیک دسته بندی کردی و تعداد رخدادهای امنیتی هر تاپیک را بدست می آوریم. نمودار علت سقوط پروازها بر اساس تعداد تکرار به صورت زیر است:
accidents_gamma <- tidy(accident_lda, matrix = "gamma")
accidents_data <- accidents_gamma %>% group_by(document) %>%
top_n(1, gamma) %>%
ungroup()
accident_summary <- accidents_data %>% group_by(topic) %>% summarise(count = n()) %>%
arrange(desc(count))
accident_summary <- accident_summary %>% inner_join(top_terms_merge, by = c("topic"))
occurance_sum = sum(accident_summary$count)
accident_summary <- accident_summary %>% mutate(count_percent = 100*count/occurance_sum)
knitr::kable(accident_summary %>% select(-count_percent))| topic | count | words |
|---|---|---|
| 10 | 2169 | crashed, mountain, 2, km, antonov |
| 15 | 1816 | damaged, repair, accident, reportedly, raf |
| 14 | 1672 | engine, takeoff, landing, forced, lost |
| 19 | 1296 | air, force, transport, base, flying |
| 9 | 1212 | damage, airport, sustained, substantial, accident |
| 7 | 1206 | hijacker, demanded, hijackers, passengers, 1 |
| 3 | 1184 | runway, feet, short, left, rest |
| 11 | 1122 | accident, destroyed, killed, airport, crew |
| 20 | 1024 | approach, weather, pilot, conditions, visibility |
| 2 | 986 | sea, missing, water, hit, hangar |
| 18 | 893 | en, route, night, operation, NA |
| 1 | 875 | landing, gear, main, collapsed, undercarriage |
| 13 | 862 | feet, departed, crew, reported, cleared |
| 12 | 741 | fire, caught, ground, destroyed, de |
| 5 | 673 | engine, fuel, wing, left, takeoff |
| 8 | 667 | runway, captain, feet, approach, pilot |
| 6 | 640 | pilot, crew, test, cabin, system |
| 4 | 636 | crash, terrain, lake, airstrip, pilot |
| 17 | 633 | dc, 3, struck, 4, douglas |
| 16 | 570 | airport, boeing, international, air, otter |
accident_summary %>% arrange(topic) %>%
hchart(type = "pie", hcaes(x = words ,y = count_percent)) %>%
hc_yAxis(title = list(text = "Count")) %>%
hc_xAxis(title = list(text = "Topic")) %>%
hc_title(text = "Airsafety Occurance Based on Topic", style = list(fontWeight = "bold")) %>%
hc_add_theme(hc_theme_538())
۳. آیا در صورت سقوط پرواز یک ایرلاین، دیگر نباید با آن پرواز کنیم؟
برای این سوال از دادههای حاوی میزان مسافت پرواز و ظرفیت هواپیما بهرهبردیم و شاخص ASK را برابر میزان صندلی دردسترس کلیومتر قرار دادیمایم وشاخص یک میلیون برابر را اعمال کردهایم. همانطور که از شکل و نتیجهی تست برای میزان مرگ و میر بدستآمده نمیتوان از میزان کشتههای یک شرکت هواپیمایی در سوانح هوایی آن پیش بینیای برای سالهای آیندهی آن انجامداد و نمیتوان دلیلی برای سفرنکردن با آن شرکت درنظر گرفت اما حوادث به صورت کلی رابطه دارد با میزان حوادثی که در گذشتهی آن شرکت رخ داده، واین رابطه رابطهایست مستقیم که از نتیجهای تست و نمودار نیز مشخص است و میتوان نتیجهگرفت که شرکتی که حوادث بیشتری را تجربهکردهاست در آینده نیز حوادث بیشتری را تجربهخواهد کرد و سفر با آن شرکت هواپیمایی ریسک بالاتری خواهدداشت. میتوان این اتفاق را به سیاستگذاری و مدیریت هواپیمایی مربوط دانست.
airline_safety = read_csv("Data/airline_safety.csv") %>%
mutate(death_ask_85_99 = (fatalities_85_99/avail_seat_km_per_week)*10^6,
death_ask_00_14 = (fatalities_00_14/avail_seat_km_per_week)*10^6)
ggplot(airline_safety, aes(x = death_ask_85_99, y = death_ask_00_14)) +
geom_point() +
geom_text(aes(label=ifelse(death_ask_00_14>0.4 | death_ask_85_99>0.4,as.character(airline),'')),
hjust=-0.1,
vjust=-0.1,
angle=10,
size=3) +
scale_x_continuous(name="Death in 1985 - 1999 ASK", limits=c(0, 1.2)) +
scale_y_continuous(name="Death in 2000 - 2014 ASK", limits=c(0, 1)) +
geom_smooth(method = lm, se = FALSE) +
ggtitle("Fatalities of Companies")cor.test(airline_safety$fatalities_85_99, airline_safety$fatalities_00_14)
Pearson's product-moment correlation
data: airline_safety$fatalities_85_99 and airline_safety$fatalities_00_14
t = 0.3456, df = 54, p-value = 0.731
alternative hypothesis: true correlation is not equal to 0
95 percent confidence interval:
-0.2186217 0.3060989
sample estimates:
cor
0.04697895
ggplot(airline_safety, aes(x = incidents_85_99, y = incidents_00_14)) +
geom_point() +
geom_text(aes(label=ifelse(incidents_00_14>10 | incidents_85_99>20,as.character(airline),'')),
hjust=0,
vjust=0,
angle=10,
size=3) +
scale_x_continuous(name="Incidents in 1985 - 1999 ASK") +
scale_y_continuous(name="Incidents in 2000 - 2014 ASK") +
geom_smooth(method = lm, se = FALSE) +
ggtitle("Incidents of Companies")cor.test(airline_safety$incidents_85_99, airline_safety$incidents_00_14)
Pearson's product-moment correlation
data: airline_safety$incidents_85_99 and airline_safety$incidents_00_14
t = 3.2359, df = 54, p-value = 0.002073
alternative hypothesis: true correlation is not equal to 0
95 percent confidence interval:
0.1567121 0.6021146
sample estimates:
cor
0.4030088
۴. آیا واقعا سن هواپیما در میزان تلفات آن موثر است؟
برای بررسی این موضوع بایستی سن یک هواپیما و نرخ تلفات آن را محسابه کنیم و از آزمون فرض cor.test بهرهمیبریم تا رابطهی این دو متغیر را بیابیم.
همانطور که در نتیجهی این آزمون مشهود است کمتربودن سن یک هواپیما هیچ تاثیری در کمتربودن نرخ کشتهشدگان آن حادثهی هواپیمایی ندارد و این نسبت اندکی به سمت رابطهی عکس است اما دلیل محکمی بر وارونه بودن این فرض نیست، به طور کلی میتوان از نتیجه این برداشت را داشت که هواپیماهایی که از کیفیت مناسبی برخوردار نیستند، زودتر نابود شده و سنین بالا را نمیبینند و از این رو هواپیماهایی که سن زیادی دارند در اولین حادثهی منجر به تلفات کم نیز کاملا از خطوط هوایی خارج میشوند. این تفسیر و تحلیل این نتیجه را تایید میکند که منطقی نیز هست و دادههای کاملا آن را تصدیق میکنند.
age_death = read_csv("Data/asn_c.csv") %>% as.data.frame(stringsAsFactors = F) %>%
mutate(Total_occupants = ifelse(Total_occupants == 0 & Total_fatalities != 0, Total_fatalities, Total_occupants),
Total_survivors = abs(Total_occupants - Total_fatalities)) %>%
mutate(Total_survivors = ifelse(Total_survivors > Total_occupants, Total_occupants, abs(Total_occupants - Total_fatalities))) %>%
mutate(is_army = str_detect(Operator, regex("Force|Navy",ignore_case = T))) %>%
mutate(occ_no = row_number()) %>%
select(C.n.msn, Date, FirstFlight, Total_occupants, Total_fatalities, TotalAirframeHrs) %>%
na.omit() %>%
mutate(age = (Date - FirstFlight), death_rate = (Total_fatalities/Total_occupants)) %>%
group_by(C.n.msn) %>%
summarise(date = max(Date), first_flight = min(FirstFlight), age = max(age), total_airframe = max(TotalAirframeHrs), total_occupants = max(Total_occupants), total_fatalities = max(Total_fatalities)) %>%
mutate(death_rate = (total_fatalities/total_occupants)*100) %>%
filter(!is.nan(death_rate))
cor.test(age_death$age, age_death$death_rate)
Pearson's product-moment correlation
data: age_death$age and age_death$death_rate
t = -7.0953, df = 2760, p-value = 1.634e-12
alternative hypothesis: true correlation is not equal to 0
95 percent confidence interval:
-0.17028842 -0.09702938
sample estimates:
cor
-0.1338417
۵. چه ویژگی هایی از ایرلاین در رخدادهای امنیتی آن تاثیر دارد؟(نظیر قدمت و …)
در این سوال قصد داریم به بررسی قدمت هواپیمایی، میانگین سن هواپیماهای دچار سانحهشده، میزان زمان پرواز، میانگین تعداد خدمه و تعداد حادثههای هر شرکت هواپیمایی و ارتباط آن با میزان نرخ کشندهبودن کلی و میانگین نرخ مرگ در سوانح آنها بپردازیم.
library(corrplot)
company_attr = read_csv("Data/asn_c.csv") %>% as.data.frame(stringsAsFactors = F) %>%
mutate(Total_occupants = ifelse(Total_occupants == 0 & Total_fatalities != 0, Total_fatalities, Total_occupants),
Total_survivors = abs(Total_occupants - Total_fatalities)) %>%
mutate(Total_survivors = ifelse(Total_survivors > Total_occupants, Total_occupants, abs(Total_occupants - Total_fatalities))) %>%
mutate(is_army = str_detect(Operator, regex("Force|Navy",ignore_case = T))) %>%
mutate(occ_no = row_number()) %>%
filter(is_army == FALSE) %>%
select(C.n.msn,
Date,
Operator,
FirstFlight,
Total_occupants,
Total_fatalities,
TotalAirframeHrs,
Crew_occupants) %>%
na.omit() %>%
mutate(airplane_age = (Date - FirstFlight),
death_rate = (Total_fatalities/Total_occupants)*100) %>%
filter(!is.nan(death_rate)) %>%
group_by(Operator) %>%
summarise(last_event = max(Date),
first_event = min(FirstFlight),
mean_age_plane = mean(airplane_age),
total_airframe = sum(TotalAirframeHrs),
mean_airframe = mean(TotalAirframeHrs),
total_occupants = sum(Total_occupants),
mean_occupants = mean(Total_occupants),
total_fatalities = sum(Total_fatalities),
mean_fatalities = mean(Total_fatalities),
total_crew = sum(Crew_occupants),
mean_crew = mean(Crew_occupants),
death_rate_avg = mean(death_rate),
event_count = n()) %>%
mutate(death_rate_company = (total_fatalities/total_occupants)*100,
company_age = last_event - first_event) %>%
filter(!is.nan(death_rate_company))# total death rate
cor.test(company_attr$company_age, company_attr$death_rate_company)
Pearson's product-moment correlation
data: company_attr$company_age and company_attr$death_rate_company
t = -5.0208, df = 1476, p-value = 5.772e-07
alternative hypothesis: true correlation is not equal to 0
95 percent confidence interval:
-0.17938723 -0.07911754
sample estimates:
cor
-0.1295836
cor.test(company_attr$mean_age_plane, company_attr$death_rate_company)
Pearson's product-moment correlation
data: company_attr$mean_age_plane and company_attr$death_rate_company
t = -4.6472, df = 1476, p-value = 3.665e-06
alternative hypothesis: true correlation is not equal to 0
95 percent confidence interval:
-0.17003347 -0.06952257
sample estimates:
cor
-0.1200857
همانطور که مشاهدهمیشود قدمت شرکت و میانگین سن هواپیماهای آن با نرخ مرگومیر کلی شرکت رابطهی عکس دارد که برای قدمت شرکت میتوان افزایش تجربهی آنها را عامل دانست و برای سن هواپیما نیز استدلالی همچون سوال قبل به کار برد.
cor.test(company_attr$total_airframe, company_attr$death_rate_company)
Pearson's product-moment correlation
data: company_attr$total_airframe and company_attr$death_rate_company
t = -1.3747, df = 1476, p-value = 0.1694
alternative hypothesis: true correlation is not equal to 0
95 percent confidence interval:
-0.08659116 0.01525649
sample estimates:
cor
-0.03576019
cor.test(company_attr$mean_crew, company_attr$death_rate_company)
Pearson's product-moment correlation
data: company_attr$mean_crew and company_attr$death_rate_company
t = 1.4797, df = 1476, p-value = 0.1392
alternative hypothesis: true correlation is not equal to 0
95 percent confidence interval:
-0.01252644 0.08930061
sample estimates:
cor
0.038487
cor.test(company_attr$event_count, company_attr$death_rate_company)
Pearson's product-moment correlation
data: company_attr$event_count and company_attr$death_rate_company
t = 0.059144, df = 1476, p-value = 0.9528
alternative hypothesis: true correlation is not equal to 0
95 percent confidence interval:
-0.04945328 0.05252419
sample estimates:
cor
0.001539456
همانطور که مشاهده میشود میزان پرواز و میانگین تعداد خدمه و تعداد حوادث یک شرکت هواپیمایی ارتباطی با نرخ مرگومیر کلی آن ندارد.
# mean death rate per airplane
cor.test(company_attr$company_age, company_attr$death_rate_avg)
Pearson's product-moment correlation
data: company_attr$company_age and company_attr$death_rate_avg
t = -4.3333, df = 1476, p-value = 1.568e-05
alternative hypothesis: true correlation is not equal to 0
95 percent confidence interval:
-0.16214405 -0.06144413
sample estimates:
cor
-0.1120818
cor.test(company_attr$mean_age_plane, company_attr$death_rate_avg)
Pearson's product-moment correlation
data: company_attr$mean_age_plane and company_attr$death_rate_avg
t = -5.0192, df = 1476, p-value = 5.818e-07
alternative hypothesis: true correlation is not equal to 0
95 percent confidence interval:
-0.17934836 -0.07907763
sample estimates:
cor
-0.1295442
همانطور که مشاهدهمیشود قدمت شرکت و میانگین سن هواپیماهای آن با نرخ مرگومیر میانگین هر سانحهی آن شرکت رابطهی عکس دارد که برای قدمت شرکت میتوان افزایش تجربهی آنها را عامل دانست و برای سن هواپیما نیز استدلالی همچون سوال قبل به کار برد.
cor.test(company_attr$total_airframe, company_attr$death_rate_avg)
Pearson's product-moment correlation
data: company_attr$total_airframe and company_attr$death_rate_avg
t = -0.56442, df = 1476, p-value = 0.5726
alternative hypothesis: true correlation is not equal to 0
95 percent confidence interval:
-0.06562941 0.03632635
sample estimates:
cor
-0.01468971
cor.test(company_attr$mean_crew, company_attr$death_rate_avg)
Pearson's product-moment correlation
data: company_attr$mean_crew and company_attr$death_rate_avg
t = 1.9059, df = 1476, p-value = 0.05686
alternative hypothesis: true correlation is not equal to 0
95 percent confidence interval:
-0.001445471 0.100282549
sample estimates:
cor
0.04954704
cor.test(company_attr$event_count, company_attr$death_rate_avg)
Pearson's product-moment correlation
data: company_attr$event_count and company_attr$death_rate_avg
t = 0.56855, df = 1476, p-value = 0.5697
alternative hypothesis: true correlation is not equal to 0
95 percent confidence interval:
-0.03621909 0.06573636
sample estimates:
cor
0.0147971
همانطور که مشاهده میشود میزان پرواز و میانگین تعداد خدمه و تعداد حوادث یک شرکت هواپیمایی ارتباطی با نرخ مرگومیر میانگین هر سانحهی آن شرکت ندارد.
num_com_attr = company_attr %>%
select(-Operator)
cor_num_com_attr = cor(num_com_attr)
corrplot(cor_num_com_attr, method = "square", type = "lower", tl.col = "black", tl.srt = 10)نمودار همبستگی متغیرهای موجود را در بالا مشاهده میکنید.
۶. بدترین خطوط هوایی، بدترین پروازها، بدترین هواپیماها
بدترین خطوط هوایی، بدترین هواپیماها و بدترین فرودگاه ها
ابتدا معیار بد بودن را انتخاب می کنیم، از آنجایی که شرکت هایی که تعداد پایینی پرواز و یا پروازهای کوچکی داشته باشند، در صورت سقوط دارای نرخ پایین زنده ماندن هستند اما در واقع حادثه بزرگی به شمار نمی آیند، تنها خطوط هوایی، هواپیماها و فرودگاه هایی را انتخاب می کنیم که بالای ۵۰۰ نفر مسافر داشته اند. سپس معیار بد بودن را نرخ پایین زنده ماندن در نظر میگیریم.
# worst airline
worst_airline <- casn %>% filter(!is.na(Operator)) %>%
filter(is_army == FALSE) %>%
group_by(Operator) %>%
summarise(Total_occupants = sum(Total_occupants), Total_fatalities = sum(Total_fatalities),
Total_survivors = sum(Total_survivors), Survival_rate = 100*Total_survivors/Total_occupants) %>%
ungroup() %>%
filter(Total_occupants > 500) %>%
top_n(20, wt = desc(Survival_rate)) %>%
arrange(Survival_rate)
p = ggplot(data = worst_airline, mapping = aes(x = reorder(Operator, Survival_rate), y = Survival_rate, fill = Total_fatalities)) +
geom_bar(stat="identity") + scale_fill_gradient(low="brown1", high="brown4") +
ggtitle("Worst Airlines with lowest survival rate") +
xlab("Airline") +
ylab("Survival rate") + guides(color=guide_legend(title="fatality"), fill=guide_legend(title="fatality")) +
coord_flip()
p# worst airplane
worst_airplane <- casn %>% filter(!is.na(Type)) %>%
filter(is_army == FALSE) %>%
group_by(Type) %>%
summarise(Total_occupants = sum(Total_occupants), Total_fatalities = sum(Total_fatalities),
Total_survivors = sum(Total_survivors), Survival_rate = 100*Total_survivors/Total_occupants) %>%
ungroup() %>%
filter(Total_occupants > 500) %>%
top_n(20, wt = desc(Survival_rate)) %>%
arrange(Survival_rate)
p = ggplot(data = worst_airplane, mapping = aes(x = reorder(Type, Survival_rate), y = Survival_rate, fill = Total_fatalities)) +
geom_bar(stat="identity") +
ggtitle("Worst Airplanes with lowest survival rate") +
xlab("Airplane") +
ylab("Survival rate") + guides(color=guide_legend(title="fatality"), fill=guide_legend(title="fatality")) +
coord_flip()
p# worst route
worst_departure_airport <- casn %>% filter(!is.na(DepartureAirport)) %>%
filter(is_army == FALSE) %>%
group_by(DepartureAirport) %>%
summarise(Total_occupants = sum(Total_occupants), Total_fatalities = sum(Total_fatalities),
Total_survivors = sum(Total_survivors), Survival_rate = 100*Total_survivors/Total_occupants) %>%
ungroup() %>%
filter(Total_occupants > 500) %>%
top_n(20, wt = desc(Survival_rate)) %>%
arrange(Survival_rate)
p = ggplot(data = worst_departure_airport, mapping = aes(x = reorder(DepartureAirport, Survival_rate), y = Survival_rate, fill = Total_fatalities)) +
geom_bar(stat="identity") + scale_fill_gradient(low="midnightblue", high="darkred") +
ggtitle("Worst Departure Airports with lowest survival rate") +
xlab("Departure Airport") +
ylab("Survival rate") + guides(color=guide_legend(title="fatality"), fill=guide_legend(title="fatality")) +
coord_flip()
p
۷. سالانه چندین تصادف هوایی رخ می دهد؟ چند نفر سوار پرواز بوده اند؟ چند نفر جان سالم به در برده و چند نفر فوت کرده است؟
بررسی روند تلفات رخدادهای امنیتی پروازها در طول سالیان
برای این منظور ابتدا داده ها را بر اساس سال گروه بندی می کنیم، سپس تعداد تلفات، بازماندگان، افراد درگیر در حادثه و نرخ زنده ماندن را بدست می آوریم.(برای سال ۱۹۲۱ داده ی مناسبی به وجود نداشت به همین علت این سال از داده ها حذف شده است.)
# army and civil flights
year_fat <- casn %>% filter(!is.na(Date)) %>% group_by(Date) %>%
summarise(Total_occupants = sum(Total_occupants), Total_fatalities = sum(Total_fatalities),
Total_survivors = sum(Total_survivors), Survival_rate = 100*Total_survivors/Total_occupants)
# remove bad data
year_fat <- year_fat[-c(3),]
highchart() %>%
hc_add_series(data = year_fat, type = "spline", hcaes(x = Date, y = Total_fatalities), name = "Total Fatalities") %>%
hc_add_series(data = year_fat, type = "spline", hcaes(x = Date, y = Total_survivors), name = "Total Survivors") %>%
hc_yAxis(title = list(text = "Count")) %>%
hc_xAxis(title = list(text = "Year")) %>%
hc_title(text = "Fatalities Per Year", style = list(fontWeight = "bold")) %>%
hc_add_theme(hc_theme_flat())همانطور که مشاهده می کنیم، تلفات حوادث در حال کاهش است. البته باید دقت داشته باشیم که این کاهش هم چنین نشانگر این است که استاندارد پرواز ها بالاتر رفته است. زیرا هر چه سال جلوتر می روند، تکنولوژی نیز پیشرفت کرده و تعداد مسافران هواپیماها افزایش یافته و استفاده از سفر هوایی بیشتر می شود. پس تعداد مسافرین بیشتر شده و تعداد کشتگان کمتر می شود که نشان دهنده ی بهبود وضعیت است.
year_fat %>%
hchart(type = "spline", hcaes(x = Date, y = Survival_rate), name = "Survival Rate") %>%
hc_yAxis(title = list(text = "Survival Rate")) %>%
hc_xAxis(title = list(text = "Year")) %>%
hc_title(text = "Survival Rate Per Year", style = list(fontWeight = "bold")) %>%
hc_add_theme(hc_theme_sandsignika())با توجه به نمودارهای بالا همانطور که انتظار داشتیم، نرخ زنده ماندن تقریبا به صورت خطی بیشتر شده است.
۸. در هر کدام از دسته های علت وقوع مشکلات پروازها، چه هواپیماهایی دچار مشکل شدند و چند کشته برجای گذاشته اند؟
ابتدا دسته بندی حاصل از lda را با داده ی پروازها ادغام می کنیم و موضوعات هر رخداد هوایی را بدست می آوریم. سپس میزان تلفات و نرخ فوت را نمایش می دهیم.
casn <- casn %>% mutate(occ_no = as.character(occ_no))
casn_topics <- casn %>% inner_join(accidents_data, by = c("occ_no" = "document"))
casn_topics_sum <- casn_topics %>% group_by(topic) %>%
summarise(Total_occupants = sum(Total_occupants),
Total_fatalities = sum(Total_fatalities),
Total_survivors = sum(Total_survivors)) %>%
mutate(Survival_rate = 100*Total_survivors/Total_occupants, Fatality_rate = 100*Total_fatalities/Total_occupants) %>%
inner_join(top_terms_merge, by = c("topic"))
casn_topics_sum %>% arrange(desc(Fatality_rate)) %>%
hchart(type = "column", hcaes(x = words ,y = Fatality_rate, color = Total_occupants)) %>%
hc_yAxis(title = list(text = "Fatality Rate")) %>%
hc_xAxis(title = list(text = "Topic")) %>%
hc_title(text = "Safety Occurence Topics Fatality Rate", style = list(fontWeight = "bold")) %>%
hc_add_theme(hc_theme_google())casn_topics_yearly <- casn_topics %>% group_by(topic, Date) %>%
summarise(Total_occupants = sum(Total_occupants),
Total_fatalities = sum(Total_fatalities),
Total_survivors = sum(Total_survivors)) %>%
mutate(Survival_rate = 100*Total_survivors/Total_occupants, Fatality_rate = 100*Total_fatalities/Total_occupants) %>%
inner_join(top_terms_merge, by = c("topic")) %>%
mutate(Survival_rate = ifelse(is.na(Survival_rate), 0, Survival_rate),
Fatality_rate = ifelse(is.na(Fatality_rate), 0, Fatality_rate))
casn_topics_yearly %>% arrange(Date) %>%
hchart("heatmap", hcaes(x = Date, y = words,value = Fatality_rate)) %>%
hc_title(text = "Safety Occurence Topics Fatality Rate in Years", style = list(fontWeight = "bold"))در این بخش نشان می دهیم که چه حوادثی برای هواپیماها بیشتر رخ می دهد. برای این منظور بر اساس نوع هواپیما و تاپیک گروه بندی کردی و تعداد و مجموع بازماندگان و جانباختگان را بدست می آوریم. سپس هواپیماهایی را انتخاب می کنیم که بیشتر از ۲۵۰ نفر کشته داشته باشند.
casn_topic_air <- casn_topics %>% group_by(Type, topic) %>%
summarise(Total_occupants = sum(Total_occupants),
Total_fatalities = sum(Total_fatalities),
Total_survivors = sum(Total_survivors),
occ = n()) %>%
mutate(Survival_rate = 100*Total_survivors/Total_occupants, Fatality_rate = 100*Total_fatalities/Total_occupants) %>%
inner_join(top_terms_merge, by = c("topic")) %>%
mutate(Survival_rate = ifelse(is.na(Survival_rate), 0, Survival_rate),
Fatality_rate = ifelse(is.na(Fatality_rate), 0, Fatality_rate)) %>%
filter(Total_fatalities > 250) %>%
ungroup() %>%
group_by(Type) %>%
arrange(desc(occ)) %>%
slice(1) %>%
select(Aircraft = Type, Cause = words, occurance = occ, Fatality_rate, Total_occupants, Total_fatalities) %>%
arrange(desc(Total_fatalities))
knitr::kable(casn_topic_air)| Aircraft | Cause | occurance | Fatality_rate | Total_occupants | Total_fatalities |
|---|---|---|---|---|---|
| Boeing 747SR-46 | air, force, transport, base, flying | 1 | 99.23664 | 524 | 520 |
| Tupolev 154M | runway, captain, feet, approach, pilot | 5 | 76.47975 | 642 | 491 |
| Tupolev 154B-2 | runway, captain, feet, approach, pilot | 4 | 71.53846 | 650 | 465 |
| Antonov 24B | accident, destroyed, killed, airport, crew | 13 | 81.60470 | 511 | 417 |
| McDonnell Douglas DC-10-10 | engine, fuel, wing, left, takeoff | 4 | 40.89457 | 939 | 384 |
| Airbus A321-231 | accident, destroyed, killed, airport, crew | 2 | 100.00000 | 376 | 376 |
| Ilyushin 18V | accident, destroyed, killed, airport, crew | 7 | 78.82600 | 477 | 376 |
| Douglas DC-6 | feet, departed, crew, reported, cleared | 11 | 90.83969 | 393 | 357 |
| Boeing 747-121 | airport, boeing, international, air, otter | 2 | 45.80645 | 775 | 355 |
| Tupolev 104B | accident, destroyed, killed, airport, crew | 7 | 56.78808 | 604 | 343 |
| Tupolev 134A | en, route, night, operation, NA | 6 | 85.24173 | 393 | 335 |
| Boeing 747-237B | airport, boeing, international, air, otter | 3 | 33.23232 | 990 | 329 |
| Boeing 747-168B | feet, departed, crew, reported, cleared | 1 | 100.00000 | 312 | 312 |
| McDonnell Douglas DC-9-32 | feet, departed, crew, reported, cleared | 6 | 76.00000 | 400 | 304 |
| Lockheed L-1011 TriStar 200 | pilot, crew, test, cabin, system | 1 | 100.00000 | 301 | 301 |
| Boeing 777-2H6ER | feet, departed, crew, reported, cleared | 1 | 100.00000 | 298 | 298 |
| Airbus A300B2-203 | hijacker, demanded, hijackers, passengers, 1 | 4 | 58.46774 | 496 | 290 |
| Lisunov Li-2 | en, route, night, operation, NA | 45 | 64.94382 | 445 | 289 |
| Ilyushin 12 | en, route, night, operation, NA | 25 | 66.51270 | 433 | 288 |
| Lockheed C-130H Hercules | accident, destroyed, killed, airport, crew | 9 | 71.28463 | 397 | 283 |
| Douglas DC-6B | feet, departed, crew, reported, cleared | 9 | 87.85047 | 321 | 282 |
| Ilyushin 76MD | hijacker, demanded, hijackers, passengers, 1 | 2 | 100.00000 | 275 | 275 |
| Boeing 747-230B | feet, departed, crew, reported, cleared | 1 | 100.00000 | 269 | 269 |
| Airbus A300B4-622R | runway, captain, feet, approach, pilot | 1 | 97.41697 | 271 | 264 |
| Airbus A300B4-605R | runway, captain, feet, approach, pilot | 2 | 61.46572 | 423 | 260 |
| Boeing 747-121A | pilot, crew, test, cabin, system | 1 | 100.00000 | 259 | 259 |
| Ilyushin Il-76TD | air, force, transport, base, flying | 1 | 100.00000 | 257 | 257 |
| McDonnell Douglas DC-10-30 | feet, departed, crew, reported, cleared | 3 | 54.91453 | 468 | 257 |
| McDonnell Douglas DC-8-63CF | feet, departed, crew, reported, cleared | 1 | 100.00000 | 256 | 256 |
| Ilyushin 14P | en, route, night, operation, NA | 12 | 84.38538 | 301 | 254 |
در این بخش نشان می دهیم که برای هر تاپیک، چه هواپیماهایی بیشتر درگیر این رخداد می شوند. برای این منظور بر اساس تاپیک و نوع هواپیما گروه بندی کرده و تعداد رخداد و مجموع بازماندگان و جانباختگان را محاسبه می کنیم. در نهایت برای هر تاپیک ۵ هواپیمایی که بیشتر این تاپیک برایشان رخ داده است را نمایش می دهیم.
casn_topic_air <- casn_topics %>% group_by(topic, Type) %>%
summarise(Total_occupants = sum(Total_occupants),
Total_fatalities = sum(Total_fatalities),
Total_survivors = sum(Total_survivors),
occ = n()) %>%
mutate(Survival_rate = 100*Total_survivors/Total_occupants, Fatality_rate = 100*Total_fatalities/Total_occupants) %>%
inner_join(top_terms_merge, by = c("topic")) %>%
mutate(Survival_rate = ifelse(is.na(Survival_rate), 0, Survival_rate),
Fatality_rate = ifelse(is.na(Fatality_rate), 0, Fatality_rate)) %>%
ungroup() %>%
filter(Survival_rate < 100) %>%
group_by(topic) %>%
arrange(desc(occ)) %>%
slice(1:5) %>%
select(Aircraft = Type, Cause = words, occurance = occ, Fatality_rate, Total_occupants, Total_fatalities) %>%
arrange(desc(Total_fatalities))
knitr::kable(casn_topic_air %>% filter(topic == 1))| topic | Aircraft | Cause | occurance | Fatality_rate | Total_occupants | Total_fatalities |
|---|---|---|---|---|---|---|
| 1 | Douglas C-47-DL (DC-3) | landing, gear, main, collapsed, undercarriage | 14 | 32.432432 | 37 | 12 |
| 1 | Consolidated PBY-5A Catalina | landing, gear, main, collapsed, undercarriage | 11 | 10.000000 | 40 | 4 |
| 1 | Consolidated PBY-5 Catalina | landing, gear, main, collapsed, undercarriage | 12 | 5.882353 | 17 | 1 |
| 1 | Douglas Dakota III (DC-3) | landing, gear, main, collapsed, undercarriage | 12 | 0.000000 | 0 | 0 |
| 1 | Vickers Valetta C.1 | landing, gear, main, collapsed, undercarriage | 12 | 0.000000 | 0 | 0 |
knitr::kable(casn_topic_air %>% filter(topic == 2))| topic | Aircraft | Cause | occurance | Fatality_rate | Total_occupants | Total_fatalities |
|---|---|---|---|---|---|---|
| 2 | Douglas C-47-DL (DC-3) | sea, missing, water, hit, hangar | 35 | 94.48819 | 127 | 120 |
| 2 | Douglas C-47A-90-DL (DC-3) | sea, missing, water, hit, hangar | 19 | 100.00000 | 36 | 36 |
| 2 | Douglas Dakota III (DC-3) | sea, missing, water, hit, hangar | 28 | 94.59459 | 37 | 35 |
| 2 | Consolidated PBY-5A Catalina | sea, missing, water, hit, hangar | 37 | 71.42857 | 35 | 25 |
| 2 | Consolidated PBY-5 Catalina | sea, missing, water, hit, hangar | 40 | 23.80952 | 21 | 5 |
knitr::kable(casn_topic_air %>% filter(topic == 3))| topic | Aircraft | Cause | occurance | Fatality_rate | Total_occupants | Total_fatalities |
|---|---|---|---|---|---|---|
| 3 | Fokker F-27 Friendship 600 | runway, feet, short, left, rest | 16 | 29.503916 | 383 | 113 |
| 3 | de Havilland Canada DHC-6 Twin Otter 300 | runway, feet, short, left, rest | 18 | 29.646018 | 226 | 67 |
| 3 | Yakovlev 40 | runway, feet, short, left, rest | 21 | 14.583333 | 384 | 56 |
| 3 | Avro 685 York C.1 | runway, feet, short, left, rest | 11 | 9.677419 | 93 | 9 |
| 3 | Douglas Dakota IV (DC-3) | runway, feet, short, left, rest | 11 | 0.000000 | 0 | 0 |
knitr::kable(casn_topic_air %>% filter(topic == 4))| topic | Aircraft | Cause | occurance | Fatality_rate | Total_occupants | Total_fatalities |
|---|---|---|---|---|---|---|
| 4 | Douglas C-47-DL (DC-3) | crash, terrain, lake, airstrip, pilot | 24 | 96.77419 | 124 | 120 |
| 4 | de Havilland Canada DHC-6 Twin Otter 300 | crash, terrain, lake, airstrip, pilot | 14 | 55.88235 | 136 | 76 |
| 4 | Douglas C-47A-30-DL (DC-3) | crash, terrain, lake, airstrip, pilot | 16 | 92.75362 | 69 | 64 |
| 4 | Cessna 208B Grand Caravan | crash, terrain, lake, airstrip, pilot | 11 | 39.47368 | 76 | 30 |
| 4 | Britten-Norman BN-2A-26 Islander | crash, terrain, lake, airstrip, pilot | 10 | 56.41026 | 39 | 22 |
knitr::kable(casn_topic_air %>% filter(topic == 5))| topic | Aircraft | Cause | occurance | Fatality_rate | Total_occupants | Total_fatalities |
|---|---|---|---|---|---|---|
| 5 | Curtiss C-46F-1-CU Commando | engine, fuel, wing, left, takeoff | 7 | 27.73109 | 119 | 33 |
| 5 | Douglas C-47-DL (DC-3) | engine, fuel, wing, left, takeoff | 11 | 26.89076 | 119 | 32 |
| 5 | Boeing KC-135A Stratotanker | engine, fuel, wing, left, takeoff | 8 | 100.00000 | 18 | 18 |
| 5 | Antonov 2R | engine, fuel, wing, left, takeoff | 6 | 28.57143 | 14 | 4 |
| 5 | Consolidated PBY-5A Catalina | engine, fuel, wing, left, takeoff | 11 | 11.11111 | 9 | 1 |
knitr::kable(casn_topic_air %>% filter(topic == 6))| topic | Aircraft | Cause | occurance | Fatality_rate | Total_occupants | Total_fatalities |
|---|---|---|---|---|---|---|
| 6 | McDonnell Douglas DC-9-32 | pilot, crew, test, cabin, system | 6 | 20.79395 | 529 | 110 |
| 6 | Beechcraft 200 Super King Air | pilot, crew, test, cabin, system | 8 | 66.66667 | 33 | 22 |
| 6 | de Havilland Canada DHC-6 Twin Otter 300 | pilot, crew, test, cabin, system | 7 | 41.46341 | 41 | 17 |
| 6 | Antonov 2 | pilot, crew, test, cabin, system | 5 | 28.12500 | 32 | 9 |
| 6 | Swearingen SA226-TC Metro II | pilot, crew, test, cabin, system | 8 | 13.15789 | 38 | 5 |
knitr::kable(casn_topic_air %>% filter(topic == 7))| topic | Aircraft | Cause | occurance | Fatality_rate | Total_occupants | Total_fatalities |
|---|---|---|---|---|---|---|
| 7 | Boeing 727 | hijacker, demanded, hijackers, passengers, 1 | 120 | 0.4461440 | 1569 | 7 |
| 7 | Douglas DC-3 | hijacker, demanded, hijackers, passengers, 1 | 27 | 2.1276596 | 235 | 5 |
| 7 | NA | hijacker, demanded, hijackers, passengers, 1 | 41 | 0.5540166 | 361 | 2 |
| 7 | McDonnell Douglas DC-9 | hijacker, demanded, hijackers, passengers, 1 | 32 | 0.3496503 | 572 | 2 |
| 7 | McDonnell Douglas DC-8 | hijacker, demanded, hijackers, passengers, 1 | 36 | 0.1322751 | 756 | 1 |
knitr::kable(casn_topic_air %>% filter(topic == 8))| topic | Aircraft | Cause | occurance | Fatality_rate | Total_occupants | Total_fatalities |
|---|---|---|---|---|---|---|
| 8 | Antonov 24RV | runway, captain, feet, approach, pilot | 6 | 43.63636 | 165 | 72 |
| 8 | Douglas C-47A-25-DK (DC-3) | runway, captain, feet, approach, pilot | 7 | 39.79592 | 98 | 39 |
| 8 | de Havilland Canada DHC-6 Twin Otter 300 | runway, captain, feet, approach, pilot | 11 | 41.77215 | 79 | 33 |
| 8 | Cessna 208B Grand Caravan | runway, captain, feet, approach, pilot | 7 | 51.35135 | 37 | 19 |
| 8 | Learjet 35A | runway, captain, feet, approach, pilot | 10 | 46.87500 | 32 | 15 |
knitr::kable(casn_topic_air %>% filter(topic == 9))| topic | Aircraft | Cause | occurance | Fatality_rate | Total_occupants | Total_fatalities |
|---|---|---|---|---|---|---|
| 9 | Beechcraft 200 Super King Air | damage, airport, sustained, substantial, accident | 46 | 55.10204 | 196 | 108 |
| 9 | Cessna 208B Grand Caravan | damage, airport, sustained, substantial, accident | 74 | 14.25703 | 498 | 71 |
| 9 | Beechcraft B200 Super King Air | damage, airport, sustained, substantial, accident | 25 | 46.95652 | 115 | 54 |
| 9 | Beechcraft A100 King Air | damage, airport, sustained, substantial, accident | 19 | 30.33708 | 89 | 27 |
| 9 | Consolidated C-87 Liberator Express | damage, airport, sustained, substantial, accident | 21 | 0.00000 | 0 | 0 |
knitr::kable(casn_topic_air %>% filter(topic == 10))| topic | Aircraft | Cause | occurance | Fatality_rate | Total_occupants | Total_fatalities |
|---|---|---|---|---|---|---|
| 10 | Antonov 2R | crashed, mountain, 2, km, antonov | 164 | 67.17557 | 262 | 176 |
| 10 | Antonov 2 | crashed, mountain, 2, km, antonov | 83 | 61.19403 | 268 | 164 |
| 10 | Douglas C-47-DL (DC-3) | crashed, mountain, 2, km, antonov | 50 | 72.60274 | 146 | 106 |
| 10 | Curtiss C-46D-10-CU Commando | crashed, mountain, 2, km, antonov | 62 | 95.23810 | 42 | 40 |
| 10 | Curtiss C-46D-5-CU Commando | crashed, mountain, 2, km, antonov | 57 | 100.00000 | 6 | 6 |
knitr::kable(casn_topic_air %>% filter(topic == 11))| topic | Aircraft | Cause | occurance | Fatality_rate | Total_occupants | Total_fatalities |
|---|---|---|---|---|---|---|
| 11 | Antonov 24B | accident, destroyed, killed, airport, crew | 13 | 81.60470 | 511 | 417 |
| 11 | Antonov 26 | accident, destroyed, killed, airport, crew | 12 | 77.77778 | 144 | 112 |
| 11 | Douglas C-47 (DC-3) | accident, destroyed, killed, airport, crew | 13 | 64.40678 | 118 | 76 |
| 11 | Douglas C-47-DL (DC-3) | accident, destroyed, killed, airport, crew | 20 | 56.70103 | 97 | 55 |
| 11 | Let L-410UVP | accident, destroyed, killed, airport, crew | 12 | 22.22222 | 72 | 16 |
knitr::kable(casn_topic_air %>% filter(topic == 12))| topic | Aircraft | Cause | occurance | Fatality_rate | Total_occupants | Total_fatalities |
|---|---|---|---|---|---|---|
| 12 | de Havilland DH.86 Express | fire, caught, ground, destroyed, de | 7 | 100.00000 | 27 | 27 |
| 12 | Consolidated PBY-5A Catalina | fire, caught, ground, destroyed, de | 13 | 38.59649 | 57 | 22 |
| 12 | Douglas C-47-DL (DC-3) | fire, caught, ground, destroyed, de | 15 | 41.02564 | 39 | 16 |
| 12 | Douglas C-47 (DC-3) | fire, caught, ground, destroyed, de | 8 | 45.83333 | 24 | 11 |
| 12 | Consolidated PBY-5 Catalina | fire, caught, ground, destroyed, de | 7 | 0.00000 | 0 | 0 |
knitr::kable(casn_topic_air %>% filter(topic == 13))| topic | Aircraft | Cause | occurance | Fatality_rate | Total_occupants | Total_fatalities |
|---|---|---|---|---|---|---|
| 13 | Douglas DC-6 | feet, departed, crew, reported, cleared | 11 | 90.83969 | 393 | 357 |
| 13 | de Havilland Canada DHC-6 Twin Otter 300 | feet, departed, crew, reported, cleared | 19 | 86.50000 | 200 | 173 |
| 13 | Douglas C-47-DL (DC-3) | feet, departed, crew, reported, cleared | 12 | 80.60606 | 165 | 133 |
| 13 | Douglas C-47A-80-DL (DC-3) | feet, departed, crew, reported, cleared | 10 | 82.78689 | 122 | 101 |
| 13 | Cessna 208B Grand Caravan | feet, departed, crew, reported, cleared | 12 | 75.00000 | 64 | 48 |
knitr::kable(casn_topic_air %>% filter(topic == 14))| topic | Aircraft | Cause | occurance | Fatality_rate | Total_occupants | Total_fatalities |
|---|---|---|---|---|---|---|
| 14 | Douglas C-47-DL (DC-3) | engine, takeoff, landing, forced, lost | 34 | 43.25397 | 252 | 109 |
| 14 | Douglas Dakota III (DC-3) | engine, takeoff, landing, forced, lost | 25 | 82.35294 | 34 | 28 |
| 14 | Vickers Valetta C.1 | engine, takeoff, landing, forced, lost | 31 | 26.58228 | 79 | 21 |
| 14 | Antonov 2R | engine, takeoff, landing, forced, lost | 30 | 20.00000 | 60 | 12 |
| 14 | Douglas Dakota IV (DC-3) | engine, takeoff, landing, forced, lost | 37 | 17.94872 | 39 | 7 |
knitr::kable(casn_topic_air %>% filter(topic == 15))| topic | Aircraft | Cause | occurance | Fatality_rate | Total_occupants | Total_fatalities |
|---|---|---|---|---|---|---|
| 15 | Douglas C-47-DL (DC-3) | damaged, repair, accident, reportedly, raf | 68 | 98.33333 | 60 | 59 |
| 15 | Douglas Dakota III (DC-3) | damaged, repair, accident, reportedly, raf | 45 | 64.55696 | 79 | 51 |
| 15 | Douglas Dakota IV (DC-3) | damaged, repair, accident, reportedly, raf | 36 | 77.77778 | 9 | 7 |
| 15 | Douglas C-47D (DC-3) | damaged, repair, accident, reportedly, raf | 40 | 0.00000 | 0 | 0 |
| 15 | Douglas C-47A-90-DL (DC-3) | damaged, repair, accident, reportedly, raf | 34 | 0.00000 | 0 | 0 |
knitr::kable(casn_topic_air %>% filter(topic == 16))| topic | Aircraft | Cause | occurance | Fatality_rate | Total_occupants | Total_fatalities |
|---|---|---|---|---|---|---|
| 16 | de Havilland Canada DHC-6 Twin Otter 300 | airport, boeing, international, air, otter | 45 | 25.15337 | 489 | 123 |
| 16 | de Havilland Canada DHC-6 Twin Otter 100 | airport, boeing, international, air, otter | 6 | 60.93750 | 64 | 39 |
| 16 | de Havilland Canada DHC-6 Twin Otter 200 | airport, boeing, international, air, otter | 13 | 38.70968 | 93 | 36 |
| 16 | Learjet 35A | airport, boeing, international, air, otter | 9 | 50.00000 | 42 | 21 |
| 16 | Beechcraft B200 Super King Air | airport, boeing, international, air, otter | 5 | 53.84615 | 26 | 14 |
knitr::kable(casn_topic_air %>% filter(topic == 17))| topic | Aircraft | Cause | occurance | Fatality_rate | Total_occupants | Total_fatalities |
|---|---|---|---|---|---|---|
| 17 | Douglas C-47-DL (DC-3) | dc, 3, struck, 4, douglas | 26 | 59.53079 | 341 | 203 |
| 17 | Douglas C-47A-90-DL (DC-3) | dc, 3, struck, 4, douglas | 14 | 82.91139 | 158 | 131 |
| 17 | Douglas C-47 (DC-3) | dc, 3, struck, 4, douglas | 13 | 80.34188 | 117 | 94 |
| 17 | Douglas DC-3 | dc, 3, struck, 4, douglas | 19 | 61.02941 | 136 | 83 |
| 17 | Douglas C-47A-25-DK (DC-3) | dc, 3, struck, 4, douglas | 12 | 92.00000 | 50 | 46 |
knitr::kable(casn_topic_air %>% filter(topic == 18))| topic | Aircraft | Cause | occurance | Fatality_rate | Total_occupants | Total_fatalities |
|---|---|---|---|---|---|---|
| 18 | Lisunov Li-2 | en, route, night, operation, NA | 45 | 64.94382 | 445 | 289 |
| 18 | Douglas C-47-DL (DC-3) | en, route, night, operation, NA | 73 | 70.58824 | 119 | 84 |
| 18 | Douglas C-47A-80-DL (DC-3) | en, route, night, operation, NA | 45 | 58.18182 | 110 | 64 |
| 18 | Douglas Dakota III (DC-3) | en, route, night, operation, NA | 35 | 45.94595 | 111 | 51 |
| 18 | Douglas C-47A-90-DL (DC-3) | en, route, night, operation, NA | 26 | 63.26531 | 49 | 31 |
knitr::kable(casn_topic_air %>% filter(topic == 19))| topic | Aircraft | Cause | occurance | Fatality_rate | Total_occupants | Total_fatalities |
|---|---|---|---|---|---|---|
| 19 | Junkers Ju-52/3m | air, force, transport, base, flying | 71 | 69.38776 | 147 | 102 |
| 19 | Consolidated PBY-5 Catalina | air, force, transport, base, flying | 56 | 72.82609 | 92 | 67 |
| 19 | Consolidated PBY-5A Catalina | air, force, transport, base, flying | 52 | 50.75758 | 132 | 67 |
| 19 | Douglas C-47 (DC-3) | air, force, transport, base, flying | 22 | 90.41096 | 73 | 66 |
| 19 | Douglas C-47-DL (DC-3) | air, force, transport, base, flying | 29 | 44.00000 | 125 | 55 |
knitr::kable(casn_topic_air %>% filter(topic == 20))| topic | Aircraft | Cause | occurance | Fatality_rate | Total_occupants | Total_fatalities |
|---|---|---|---|---|---|---|
| 20 | Douglas Dakota IV (DC-3) | approach, weather, pilot, conditions, visibility | 22 | 99.01478 | 203 | 201 |
| 20 | de Havilland Canada DHC-6 Twin Otter 300 | approach, weather, pilot, conditions, visibility | 18 | 63.39286 | 224 | 142 |
| 20 | Douglas Dakota III (DC-3) | approach, weather, pilot, conditions, visibility | 18 | 90.10989 | 91 | 82 |
| 20 | Antonov 2 | approach, weather, pilot, conditions, visibility | 14 | 60.78431 | 51 | 31 |
| 20 | Antonov 2R | approach, weather, pilot, conditions, visibility | 13 | 33.33333 | 12 | 4 |
۹. آیا وقوع رخدادهایی برای یک ایرلاین باعث بهبود روند آن می شود؟
برای بررسی این روند نمودار تعداد حوادث شرکتهای هواپیماییای که دادههای مناسبی را داشتند را مورد بررسی کردیم، شرکتّهای منتخب ما دارای تلفات قابل توجه و تعداد سالهای حادثهخیز بیشتر از ۲ است. همانطور که در نمودار قابل رویت است بیشتر این شرکتها بعد از رسیدن به یک نقطهی اوج از سوانح هوایی آن را رفته رفته تعدیل کردند و به وضوح روند خود را بهبود بخشیدهاند پس میتوان این گزاره را صحیح دانست.
library(highcharter)
library(ggplot2)
company_year = read_csv("Data/asn_c.csv") %>% as.data.frame(stringsAsFactors = F) %>%
mutate(Total_occupants = ifelse(Total_occupants == 0 & Total_fatalities != 0, Total_fatalities, Total_occupants),
Total_survivors = abs(Total_occupants - Total_fatalities)) %>%
mutate(is_army = str_detect(Operator, regex("Force|Navy",ignore_case = T))) %>%
filter(is_army == FALSE) %>%
select(C.n.msn,
year = Date,
Operator,
FirstFlight,
Total_occupants,
Total_fatalities,
TotalAirframeHrs,
Crew_occupants) %>%
na.omit() %>%
group_by(Operator, year) %>%
summarise(count = n(), tot_fatal = sum(Total_fatalities), tot_occu = sum(Total_occupants)) %>%
filter(tot_occu > 20, tot_fatal > 5) %>%
mutate(index = 1, index = cumsum(index), index = max(index)) %>%
filter(index > 2) %>%
select(-index) %>%
mutate(death_rate = tot_fatal*100/tot_occu)
company_year %>%
hchart(type = "line",
hcaes(x = year, y = count, group = Operator)) %>%
hc_xAxis(title = list(text = "year")) %>%
hc_yAxis(title = list(text = "Accidents Count"),
max = 22,
tickInterval = 1,
min = 0,
plotLines = list(list(color = "#FF0000",
width = 2,
value = 11,
dashStyle = 'shortdash'))) %>%
hc_title(text = "Accidents Count of an Airline in years",
style = list(fontWeight = "bold"))
۱۰. انتخاب ارزان ترین پروازها، به معنی ناامن بودن آنها است؟
برای حل این سوال، از داده ی هزینه ی پروازهای آمریکا استفاده می کنیم و میانگین هزینه ی هر خط هوایی را بدست می آوریم. سپس برای هر خط هوایی میزان امنیت را نیز با فرمولی که در سوال بعد به تفصیل توضیح داده خواهد شد، محاسبه می کنیم. در نهایت رابطه ی میان امنیت پروازها و قیمت آن ها را برای خطوط هوایی مهم آمریکا نمایش می دهیم.
airfare <- read_csv("Data/Consumer_Airfare_Report__Table_5_-_Detailed_Fare_Information_For_Highest_and_Lowest_Fare_Markets_Under_750_Miles.csv")
american_airlines <- airfare %>% select(car) %>% distinct(car) %>% arrange(car)
pattern_name = c("Tropic", "Air Plus Comet", "American Airlines", "Alaska Airlines", "JetBlue", "Continental Air Lines",
"Discovery Airways", "Delta Air Lines", "Frontier Airlines (FL)", "Tran Airways", "Allegiant Air",
"America West Airlines", "", "", "Kiwi Regional Airlines", "", "Spirit Airlines", "Northwest",
"", "", "Horizon Air", "", "", "", "Skydive Twin Cities", "", "United Airlines", "US Airways",
"American Virginia", "Pacific Western Airlines", "", "Southwest Airlines", "", "", "Mesa Airlines",
"Midwest", "Air Wisconsin")
american_airlines <- data.frame(american_airlines, pattern_name)
colnames(american_airlines ) <- c("car","Operator")
airfare <- na.omit(airfare)
airfare <- airfare %>% mutate(price = as.numeric(str_extract(mkt_fare,"\\d+")))
sum_airfare <- airfare %>% group_by(car, Year) %>% summarise(price = mean(price))
sum_airfare <- sum_airfare %>% inner_join(american_airlines, by = c("car")) %>% ungroup() %>%
group_by(Operator) %>% summarise(price = mean(price)) %>%
arrange(desc(Operator))
need = as.data.frame(c(2, 3, 0, 0, 0, 4, 0, 0, 5, 0, 0, 0, 0, 0, 0, 0, 5, 3, 1, 6, 6, 0, 7, 0, 0, 0))
colnames(need) = c('need')
sum_airfare <- sum_airfare %>% bind_cols(need) %>% group_by(need) %>% summarise(price = mean(price)) %>%
ungroup() %>% filter(need != 0)
airlines_name = as.data.frame(c("Virgin Atlantic",
"Southwest Airlines",
"United / Continental*",
"Alaska Airlines*",
"American*",
"Delta / Northwest*",
"US Airways / America West*"))
colnames(airlines_name) = c("airline")
need = as.data.frame(c(1,2,3,4,5,6,7))
colnames(need) = c('need')
airline_safety_total = read_csv("Data/airline_safety.csv") %>%
mutate(total_fatal_accidents = (fatal_accidents_85_99 + fatal_accidents_00_14),
total_incidents = (incidents_85_99 + incidents_00_14)) %>%
select(airline, avail_seat_km_per_week, total_fatal_accidents, total_incidents) %>%
mutate(score = 20 - (9*total_fatal_accidents + total_incidents)*10^8/avail_seat_km_per_week)
airline_safety_total = merge(airline_safety_total, airlines_name, sort = FALSE) %>%
arrange(desc(airline)) %>%
bind_cols(need)
safety_price <- airline_safety_total %>% inner_join(sum_airfare, by = c("need"))
knitr::kable(safety_price %>% select(airline, price, safety = score))| airline | price | safety |
|---|---|---|
| Virgin Atlantic | 97.66667 | 19.90052 |
| US Airways / America West* | 244.64957 | 15.60205 |
| United / Continental* | 227.91987 | 18.27714 |
| Southwest Airlines | 134.86595 | 19.72532 |
| Delta / Northwest* | 249.27236 | 17.33360 |
| American* | 159.38471 | 17.89609 |
| Alaska Airlines* | 94.39572 | 18.03180 |
cor.test(formula = ~price + score, data = safety_price)
Pearson's product-moment correlation
data: price and score
t = -2.2164, df = 5, p-value = 0.07748
alternative hypothesis: true correlation is not equal to 0
95 percent confidence interval:
-0.9522264 0.1044551
sample estimates:
cor
-0.7039785
safety_price_avg = mean(safety_price$price)
safety_price_high <- safety_price %>% filter(price > safety_price_avg)
safety_price_low <- safety_price %>% filter(price <= safety_price_avg)
wilcox.test(safety_price_high$score, safety_price_low$score, alternative = "less", exact = FALSE, correct = FALSE)
Wilcoxon rank sum test
data: safety_price_high$score and safety_price_low$score
W = 2, p-value = 0.07865
alternative hypothesis: true location shift is less than 0
همانطور که می بینیم، در تست کوریلیشن فرض استقلال قیمت پرواز و امنیت آن با دقت ۰.۱ رد می شود. هم چنین می بینیم که هزینه ی پرواز و امنیت آن همبستگی معکوس بالایی دارند و هر چه پرواز ارزانتر می شود، امنیت آن کمتر می شود. همچنین برای اطمینان ببیشتر نیز از تست wilcox rank sum test استفاده می کنیم. فرض صفر را بیشتر بودن امنیت پروازهای ارزانتر در نظر میگیریم. مشاهده می کنیم که با دقت ۰.۱ فرض صفر باطل است و پروازهای ارزانتر امنیت بیشتری از پروازهای گرانتر ندارند.
۱۱. اضافه کردن معیار امنیت به پروازها
برای این سوال به دنبال یک معیار جهت ارزیابی همه جانبهی یک پرواز با توجه به سابقهی هواپیمایی و سابقهی هواپیما و همچنین یک پرواز خاص که خود شرایط خاصی همچون مسیر و مسافت دارد بودیم و فرمول زیر را به عنوان معیاری از امنیت برای یک پرواز بدستآوردهایم.
معیار امنیت = نرخ نجات در شرکت هواپیمایی در میانگین تعداد مسافران در هر پرواز + نرخ نجات در هواپیمای موردنظر در میانگین تعداد مسافران آن هواپیما + حاصل ضرب نرخ نجات در هواپیما و شرکت هواپیمایی در تعداد مسافر هواپیمای مذکور
بعد از اعمال این معیار نتیجه کسب بالاترین امتیاز توسط هواپیماهای برتر در خطوط هوایی عالی در جهان شد که مناسب بودن معیار را نشان میدهد. نمونهای از این امتیازدهی را در زیر مشاهده میکنید.
# find safety for flights
casn_flight_safety = casn_topics %>% filter(!is.na(Type) & !is.na(Operator) & Total_occupants != 0) %>%
group_by(Type) %>%
mutate(airplane_total_occupants = sum(Total_occupants), airplane_total_fatalities = sum(Total_fatalities),
airplane_total_survivors = sum(Total_survivors), airplane_survival_rate = airplane_total_survivors/airplane_total_occupants,
airplane_occurance = n(), airplane_mean_occupants = mean(Total_occupants)) %>%
ungroup() %>%
group_by(Operator) %>%
mutate(airline_total_occupants = sum(Total_occupants), airline_total_fatalities = sum(Total_fatalities),
airline_total_survivors = sum(Total_survivors), airline_survival_rate = airline_total_survivors/airline_total_occupants,
airline_occurance = n(), airline_mean_occupants = mean(Total_occupants)) %>%
ungroup() %>%
mutate(score = (airplane_survival_rate*airplane_mean_occupants) + (airline_survival_rate*airline_mean_occupants) + (airplane_survival_rate*airline_survival_rate*Total_occupants)) %>%
group_by(Operator, Type) %>%
summarise(flight_safety_indicator = mean(score))
knitr::kable(head(casn_flight_safety))| Operator | Type | flight_safety_indicator |
|---|---|---|
| 2nd Arkhangelsk United Aviation Division | Antonov 2 | 17.69351 |
| 2nd Sverdlovsk Aviation Enterprise | Antonov 2R | 21.62431 |
| 748 Air Services | Antonov 12BP | 19.07187 |
| 748 Air Services | British Aerospace BAe-748-398 Srs. 2B | 95.75000 |
| 748 Air Services | Hawker Siddeley HS-748-206 Andover CC2 | 17.75000 |
| 748 Air Services | Hawker Siddeley HS-780 Andover C.1 | 19.75000 |
۱۲. رده بندی پروازها بر اساس امنیت
از دادههای سوال قبل بهرهبرده و در جدولی برترین پروازهای خطوط هواپیمایی را نشان میدهیم.
top_20_flight = casn_flight_safety %>%
ungroup() %>%
arrange(-flight_safety_indicator) %>%
slice(1:20)
knitr::kable(top_20_flight)| Operator | Type | flight_safety_indicator |
|---|---|---|
| Air Atlanta Icelandic | Boeing 747-267B | 1338.0000 |
| Mahan Air | Boeing 747-3B3 | 1089.0000 |
| Qantas | Airbus A380-842 | 1062.0357 |
| All Nippon Airways - ANA | Boeing 747-481D | 1055.8114 |
| Tower Air | Boeing 747-136 | 966.0000 |
| Air France | Airbus A380-861 | 951.4491 |
| Qantas | Boeing 747-438 | 900.3825 |
| Emirates | Boeing 777-31H | 887.5000 |
| Air Transat | Airbus A330-243 | 870.5000 |
| Emirates | Airbus A340-541 | 837.5000 |
| Thomson Airways | Boeing 767-324ER (WL) | 810.0000 |
| All Nippon Airways - ANA | Boeing 747SR-81 | 776.8594 |
| Japan Air Lines - JAL | McDonnell Douglas DC-10-40 | 770.1272 |
| Korean Air | Boeing 747-4B5 | 769.7858 |
| United Arab Emirates - Amiri Flight | Airbus A300C4-620 | 755.7454 |
| WestJet | Boeing 767-338ER (WL) | 747.0000 |
| Ansett Australia Airlines | Boeing 747-312 | 737.5000 |
| Air France | Boeing 747-128 | 730.7760 |
| British Airways | Boeing 747-436 | 720.3829 |
| Qantas | Airbus A330-303 | 716.5805 |
۱۳. تاثیر وقوع سانحه ی هوایی بر روی قیمت بلیت های آن ایرلاین
برای این سوال ابتدا داده ی قیمت پروازهای آمریکا را با داده ی حوادث ادغام کرده ایم. برای این منظور میانگین قیمت بلیت را به طور سالانه و برای هر ایرلاین بدست آورده و آن را برای رخدادهای امنیتی پروازهای آن ایرلاین در آن سال قرار می دهیم.
همانطور که مشاهده میکنید برای ۳ ایرلاین حادثه خیز در آمریکا اطلاعات را جمع آوری کرده و میزان قیمت و حوادث آن ها را کشیده ایم که به وضوح مشخص است هیچ ربط خاصی بین این دو مقدار وجود ندارد که منطقی نیز به نظر می رسد و هرشرکت هواپیمایی با توجه به هزینه های خود قیمت گذاری را انجام می دهد و در صورت بروز سانحه نیز دلیلی بر کاهش هزینه های آن شرکت دیده نمی شود بلکه خسارات و پیشگیری ها هزینه های بیشتری را متحمل این شرکت ها می کند که تست ارتباط نیز هیچ ارتباطی را بین این دو متغیر تایید نمی کند.
library(reshape2)
sum_airfare <- airfare %>% group_by(car, Year) %>% summarise(price = mean(price))
sum_airfare <- sum_airfare %>% inner_join(american_airlines, by = c("car")) %>% ungroup()
american_crashes <- casn_topics %>% ungroup() %>%
inner_join(american_airlines, by = c("Operator"))
sum_american_crashes <- american_crashes %>% group_by(Operator, Date) %>%
summarise(Total_occupants = sum(Total_occupants),
Total_fatalities = sum(Total_fatalities),
Total_survivors = sum(Total_survivors),
occurance = n()) %>%
mutate(Survival_rate = ifelse(Total_occupants < 1, 0, 100*Total_survivors/Total_occupants),
Fatality_rate = ifelse(Total_occupants < 1, 0, 100*Total_fatalities/Total_occupants)) %>%
ungroup()
Total_occupants_avg <- mean(sum_american_crashes$Total_occupants)
sum_american_crashes <- sum_american_crashes %>%
mutate(safety = ifelse(Total_occupants < 1, 0.1, Survival_rate - Total_occupants_avg/(occurance*Total_occupants)))
crash_price <- sum_airfare %>% inner_join(sum_american_crashes, by = c("Operator", "Year" = "Date"))
crash_efect = crash_price %>%
select(year = Year, airline = Operator, price, occurance) %>%
filter(airline == "American Airlines" |
airline == "United Airlines" |
airline == "Delta Air Lines")
ggplot(crash_efect, aes(x = year, y = occurance , group = airline, fill = airline)) +
geom_bar(stat="identity",position="dodge") + ggtitle("Airline Safety Occurance Yearly")ggplot(crash_efect, aes(x = year, y = price , group = airline, fill = airline)) +
geom_bar(stat="identity",position="dodge") + ggtitle("Airline Average Price Yearly")cor.test(crash_efect$price, crash_efect$occurance)
Pearson's product-moment correlation
data: crash_efect$price and crash_efect$occurance
t = 0.23624, df = 32, p-value = 0.8148
alternative hypothesis: true correlation is not equal to 0
95 percent confidence interval:
-0.3006828 0.3746055
sample estimates:
cor
0.04172557
۱۴. تاثیر تحریم ها بر روی سوانح هوایی ایران و تحلیل سوالات فوق برای ایران
برای حل این سوال، خطوط هوایی ایران را یافته و داده ی مربوط به ایران را بدست می آوریم. سپس رخدادهای امنیتی را به دو دسته ی با تلفات و بدون آن تقسیم می کنیم و نمودار آن را در طول زمان می کشیم.
iran_casn <- casn_topics %>% filter(str_detect(Operator, "Iran")) %>%
bind_rows(casn_topics %>% filter(str_detect(Operator, "Qeshm")))
rest_Iranian_operators = as.data.frame(c("Mahan Air", "Zagros Air", "Kish Air", "Taban Air", "Caspian Airlines",
"Saha Air"))
colnames(rest_Iranian_operators) = c("Operator")
iran_casn <- iran_casn %>%
bind_rows(casn %>% inner_join(rest_Iranian_operators, by = c("Operator")))
iran_casn <- iran_casn %>%
mutate(kind = ifelse(Total_fatalities > 0, "Crash", "Incident"))
# a: sanctions effect
iran_sum <- iran_casn %>% group_by(Date, kind) %>%
summarise(count = n(), fatalities = sum(Total_fatalities))
iran_sum <- iran_casn %>% ungroup() %>% group_by(Date) %>%
summarise(count = n(), fatalities = sum(Total_fatalities)) %>%
mutate(kind = "All") %>% bind_rows(iran_sum)
iran_sum %>%
hchart(type = "column", hcaes(x = Date, y = count, group = kind)) %>%
hc_yAxis(title = list(text = "Count")) %>%
hc_xAxis(title = list(text = "Year"),
plotLines = list(list(color = "#FF0000", width = 2, value = 1978, dashStyle = 'shortdash',
label = list(text = "Iran Revolution - Sanctions")))) %>%
hc_title(text = "Iran Safety Occurance", style = list(fontWeight = "bold")) %>%
hc_add_theme(hc_theme_flat())iran_before <- iran_sum %>% filter(Date < 1978 & kind == "All")
iran_after <- iran_sum %>% filter(Date >= 1978 & kind == "All")
t.test(iran_before$count, iran_after$count, alternative = "less")
Welch Two Sample t-test
data: iran_before$count and iran_after$count
t = -2.2802, df = 47.773, p-value = 0.01355
alternative hypothesis: true difference in means is less than 0
95 percent confidence interval:
-Inf -0.1645028
sample estimates:
mean of x mean of y
1.600000 2.222222
همانطور که می بینیم، همه ی انواع رخدادهای امنیتی پس از انقلاب افزایش یافته است و تحریم ها موثر بوده است.
برای تست تاثیر تحریم ها، فرض صفر را کمتر شدن رخدادهای امنیتی پس از انقلاب در نظر می گیریم. همانطور که نتیجه نشان می دهد، این احتمال بسیار کم بوده و فرض صفر رد می شود. پس تحریم ها تاثیر داشته و رخدادهای امنیتی پس از انقلاب بسیار بیشتر از پیش از آن است.
بدترین خطوط هوایی، پروازها، فرودگاه های مقصد
# 6
# worst airline
worst_airline <- iran_casn %>% filter(!is.na(Operator)) %>%
filter(is_army == FALSE) %>%
group_by(Operator) %>%
summarise(Total_occupants = sum(Total_occupants), Total_fatalities = sum(Total_fatalities),
Total_survivors = sum(Total_survivors), Survival_rate = 100*Total_survivors/Total_occupants) %>%
ungroup() %>%
filter(Survival_rate != 0) %>%
top_n(5, wt = desc(Survival_rate)) %>%
arrange(Survival_rate)
p = ggplot(data = worst_airline, mapping = aes(x = reorder(Operator, Survival_rate), y = Survival_rate, fill = Total_fatalities)) +
geom_bar(stat="identity") + scale_fill_gradient(low="brown1", high="brown4") +
ggtitle("Worst Iranian Airlines with lowest survival rate") +
xlab("Airline") +
ylab("Survival rate") + guides(color=guide_legend(title="fatality"), fill=guide_legend(title="fatality")) +
coord_flip()
p# worst airplane
worst_airplane <- iran_casn %>% filter(!is.na(Type)) %>%
filter(is_army == FALSE) %>%
group_by(Type) %>%
summarise(Total_occupants = sum(Total_occupants), Total_fatalities = sum(Total_fatalities),
Total_survivors = sum(Total_survivors), Survival_rate = 100*Total_survivors/Total_occupants) %>%
ungroup() %>%
filter(Survival_rate != 0) %>%
top_n(5, wt = desc(Survival_rate)) %>%
arrange(Survival_rate)
p = ggplot(data = worst_airplane, mapping = aes(x = reorder(Type, Survival_rate), y = Survival_rate, fill = Total_fatalities)) +
geom_bar(stat="identity") +
ggtitle("Worst Iranian Airplanes with lowest survival rate") +
xlab("Airplane") +
ylab("Survival rate") + guides(color=guide_legend(title="fatality"), fill=guide_legend(title="fatality")) +
coord_flip()
p# worst route
worst_departure_airport <- iran_casn %>% filter(!is.na(DepartureAirport)) %>%
filter(is_army == FALSE) %>%
group_by(DepartureAirport) %>%
summarise(Total_occupants = sum(Total_occupants), Total_fatalities = sum(Total_fatalities),
Total_survivors = sum(Total_survivors), Survival_rate = 100*Total_survivors/Total_occupants) %>%
ungroup() %>%
filter(Survival_rate != 0) %>%
top_n(5, wt = desc(Survival_rate)) %>%
arrange(Survival_rate)
p = ggplot(data = worst_departure_airport, mapping = aes(x = reorder(DepartureAirport, Survival_rate), y = Survival_rate, fill = Total_fatalities)) +
geom_bar(stat="identity") + scale_fill_gradient(low="midnightblue", high="darkred") +
ggtitle("Worst Iranian Departure Airports with lowest survival rate") +
xlab("Departure Airport") +
ylab("Survival rate") + guides(color=guide_legend(title="fatality"), fill=guide_legend(title="fatality")) +
coord_flip()
pمیزان تلفات هوایی ایران در سال ها
# 7
# army and civil flights
year_fat <- iran_casn %>% filter(!is.na(Date)) %>% group_by(Date) %>%
summarise(Total_occupants = sum(Total_occupants), Total_fatalities = sum(Total_fatalities),
Total_survivors = sum(Total_survivors), Survival_rate = 100*Total_survivors/Total_occupants)
highchart() %>%
hc_add_series(data = year_fat, type = "spline", hcaes(x = Date, y = Total_fatalities), name = "Total Fatalities") %>%
hc_add_series(data = year_fat, type = "spline", hcaes(x = Date, y = Total_survivors), name = "Total Survivors") %>%
hc_yAxis(title = list(text = "Count")) %>%
hc_xAxis(title = list(text = "Year")) %>%
hc_title(text = "Iran Fatalities Per Year", style = list(fontWeight = "bold")) %>%
hc_add_theme(hc_theme_flat())میزان تلفات علل وقوع حوادث امنیتی در ایران
# 8
iran_casn_topics_sum <- iran_casn %>% group_by(topic) %>%
summarise(Total_occupants = sum(Total_occupants),
Total_fatalities = sum(Total_fatalities),
Total_survivors = sum(Total_survivors)) %>%
mutate(Survival_rate = 100*Total_survivors/Total_occupants, Fatality_rate = 100*Total_fatalities/Total_occupants) %>%
inner_join(top_terms_merge, by = c("topic"))
iran_casn_topics_sum %>% arrange(desc(Fatality_rate)) %>%
hchart(type = "column", hcaes(x = words ,y = Fatality_rate, color = Total_occupants)) %>%
hc_yAxis(title = list(text = "Fatality Rate")) %>%
hc_xAxis(title = list(text = "Topic")) %>%
hc_title(text = "Iran Safety Occurence Topics Fatality Rate", style = list(fontWeight = "bold")) %>%
hc_add_theme(hc_theme_google())iran_casn_topics_yearly <- iran_casn %>% group_by(topic, Date) %>%
summarise(Total_occupants = sum(Total_occupants),
Total_fatalities = sum(Total_fatalities),
Total_survivors = sum(Total_survivors)) %>%
mutate(Survival_rate = 100*Total_survivors/Total_occupants, Fatality_rate = 100*Total_fatalities/Total_occupants) %>%
inner_join(top_terms_merge, by = c("topic")) %>%
mutate(Survival_rate = ifelse(is.na(Survival_rate), 0, Survival_rate),
Fatality_rate = ifelse(is.na(Fatality_rate), 0, Fatality_rate))
iran_casn_topics_yearly %>% arrange(Date) %>%
hchart("heatmap", hcaes(x = Date, y = words,value = Fatality_rate)) %>%
hc_title(text = "Iran Safety Occurence Topics Fatality Rate in Years", style = list(fontWeight = "bold"))حوادثی که برای هواپیماهای ایران بیشتر رخ می دهند
iran_casn_topic_air <- iran_casn %>% group_by(Type, topic) %>%
summarise(Total_occupants = sum(Total_occupants),
Total_fatalities = sum(Total_fatalities),
Total_survivors = sum(Total_survivors),
occ = n()) %>%
mutate(Survival_rate = 100*Total_survivors/Total_occupants, Fatality_rate = 100*Total_fatalities/Total_occupants) %>%
inner_join(top_terms_merge, by = c("topic")) %>%
mutate(Survival_rate = ifelse(is.na(Survival_rate), 0, Survival_rate),
Fatality_rate = ifelse(is.na(Fatality_rate), 0, Fatality_rate)) %>%
ungroup() %>%
group_by(Type) %>%
arrange(desc(occ)) %>%
slice(1) %>%
select(Aircraft = Type, Cause = words, occurance = occ, Fatality_rate, Total_occupants, Total_fatalities) %>%
arrange(desc(Total_fatalities))
knitr::kable(iran_casn_topic_air)| Aircraft | Cause | occurance | Fatality_rate | Total_occupants | Total_fatalities |
|---|---|---|---|---|---|
| Airbus A300B2-203 | hijacker, demanded, hijackers, passengers, 1 | 4 | 58.467742 | 496 | 290 |
| Tupolev Tu-154M | en, route, night, operation, NA | 1 | 100.000000 | 131 | 131 |
| Boeing 727-286 | hijacker, demanded, hijackers, passengers, 1 | 1 | 74.285714 | 105 | 78 |
| ATR 72-212 | hijacker, demanded, hijackers, passengers, 1 | 1 | 100.000000 | 66 | 66 |
| Fokker F-28 Fellowship 1000 | dc, 3, struck, 4, douglas | 1 | 100.000000 | 66 | 66 |
| Antonov 74T-200 | runway, feet, short, left, rest | 1 | 97.368421 | 38 | 37 |
| Lockheed C-130H Hercules | sea, missing, water, hit, hangar | 1 | 100.000000 | 32 | 32 |
| Yakovlev 40 | hijacker, demanded, hijackers, passengers, 1 | 1 | 100.000000 | 30 | 30 |
| Tupolev 154M | landing, gear, main, collapsed, undercarriage | 1 | 18.918919 | 148 | 28 |
| Douglas C-47A-35-DL (DC-3) | accident, destroyed, killed, airport, crew | 1 | 96.000000 | 25 | 24 |
| Boeing 747-131F | feet, departed, crew, reported, cleared | 1 | 100.000000 | 17 | 17 |
| Lockheed L-1329-25 JetStar II | pilot, crew, test, cabin, system | 1 | 100.000000 | 12 | 12 |
| Douglas C-47A-30-DL (DC-3) | hijacker, demanded, hijackers, passengers, 1 | 1 | 100.000000 | 9 | 9 |
| Douglas C-47A-25-DK (DC-3) | crashed, mountain, 2, km, antonov | 2 | 100.000000 | 8 | 8 |
| Ilyushin 76MD Simorgh | engine, takeoff, landing, forced, lost | 1 | 100.000000 | 7 | 7 |
| Boeing 727 | hijacker, demanded, hijackers, passengers, 1 | 6 | 1.582278 | 316 | 5 |
| Fokker F-27 Friendship 200 | crashed, mountain, 2, km, antonov | 1 | 100.000000 | 4 | 4 |
| Boeing 737-286 | hijacker, demanded, hijackers, passengers, 1 | 1 | 100.000000 | 3 | 3 |
| de Havilland Canada DHC-6 Twin Otter 300 | approach, weather, pilot, conditions, visibility | 1 | 100.000000 | 3 | 3 |
| Douglas C-47 (DC-3) | crashed, mountain, 2, km, antonov | 1 | 100.000000 | 1 | 1 |
| unknown jetliner | hijacker, demanded, hijackers, passengers, 1 | 1 | 100.000000 | 1 | 1 |
| Airbus A300B2-202 | hijacker, demanded, hijackers, passengers, 1 | 1 | 0.000000 | 315 | 0 |
| Boeing 707-300 | hijacker, demanded, hijackers, passengers, 1 | 1 | 0.000000 | 0 | 0 |
| Boeing 727-86 | pilot, crew, test, cabin, system | 1 | 0.000000 | 2 | 0 |
| Boeing 747 | hijacker, demanded, hijackers, passengers, 1 | 2 | 0.000000 | 0 | 0 |
| Boeing 747SP-86 | landing, gear, main, collapsed, undercarriage | 1 | 0.000000 | 180 | 0 |
| Dassault Falcon 20E | crashed, mountain, 2, km, antonov | 1 | 0.000000 | 0 | 0 |
| Dassault Falcon 20F | damaged, repair, accident, reportedly, raf | 2 | 0.000000 | 0 | 0 |
| Douglas C-47-DL (DC-3) | engine, fuel, wing, left, takeoff | 1 | 0.000000 | 2 | 0 |
| Douglas C-47A-20-DK (DC-3) | engine, takeoff, landing, forced, lost | 1 | 0.000000 | 6 | 0 |
| Douglas C-47A-20-DL (DC-3) | engine, takeoff, landing, forced, lost | 1 | 0.000000 | 0 | 0 |
| Douglas C-47A-70-DL (DC-3) | engine, takeoff, landing, forced, lost | 1 | 0.000000 | 28 | 0 |
| Douglas C-47B-20-DK (DC-3) | fire, caught, ground, destroyed, de | 1 | 0.000000 | 0 | 0 |
| Douglas DC-4 | engine, takeoff, landing, forced, lost | 1 | 0.000000 | 3 | 0 |
| Douglas R4D-4 (DC-3) | crashed, mountain, 2, km, antonov | 1 | 0.000000 | 0 | 0 |
| Fokker 100 | hijacker, demanded, hijackers, passengers, 1 | 4 | 0.000000 | 226 | 0 |
| Fokker F-27 Friendship 300 | hijacker, demanded, hijackers, passengers, 1 | 1 | 0.000000 | 0 | 0 |
| Fokker F-27 Friendship 400M | crashed, mountain, 2, km, antonov | 1 | 0.000000 | 0 | 0 |
| Fokker F-27 Friendship 600 | runway, feet, short, left, rest | 1 | 0.000000 | 50 | 0 |
| Fokker F-28 Fellowship 4000 | approach, weather, pilot, conditions, visibility | 1 | 0.000000 | 88 | 0 |
| Ilyushin 76MD | engine, fuel, wing, left, takeoff | 1 | 0.000000 | 0 | 0 |
| Lockheed C-130 Hercules | air, force, transport, base, flying | 2 | 0.000000 | 0 | 0 |
| Lockheed C-130E Hercules | crashed, mountain, 2, km, antonov | 2 | 0.000000 | 0 | 0 |
| Lockheed EC-130E Hercules | air, force, transport, base, flying | 1 | 0.000000 | 0 | 0 |
| Lockheed P-3F Orion | en, route, night, operation, NA | 1 | 0.000000 | 0 | 0 |
| Vickers 782D Viscount | landing, gear, main, collapsed, undercarriage | 1 | 0.000000 | 0 | 0 |
| NA | hijacker, demanded, hijackers, passengers, 1 | 5 | 0.000000 | 0 | 0 |
چه هواپیماهایی در ایران بیشتر دچار هر دسته ی حوادث می شوند
iran_casn_topic_air <- iran_casn %>% group_by(topic, Type) %>%
summarise(Total_occupants = sum(Total_occupants),
Total_fatalities = sum(Total_fatalities),
Total_survivors = sum(Total_survivors),
occ = n()) %>%
mutate(Survival_rate = 100*Total_survivors/Total_occupants, Fatality_rate = 100*Total_fatalities/Total_occupants) %>%
inner_join(top_terms_merge, by = c("topic")) %>%
mutate(Survival_rate = ifelse(is.na(Survival_rate), 0, Survival_rate),
Fatality_rate = ifelse(is.na(Fatality_rate), 0, Fatality_rate)) %>%
ungroup() %>%
filter(Survival_rate < 100) %>%
group_by(topic) %>%
arrange(desc(occ)) %>%
slice(1:3) %>%
select(Aircraft = Type, Cause = words, occurance = occ, Fatality_rate, Total_occupants, Total_fatalities) %>%
arrange(desc(Total_fatalities))
knitr::kable(iran_casn_topic_air %>% filter(topic == 1))| topic | Aircraft | Cause | occurance | Fatality_rate | Total_occupants | Total_fatalities |
|---|---|---|---|---|---|---|
| 1 | Tupolev 154M | landing, gear, main, collapsed, undercarriage | 1 | 18.91892 | 148 | 28 |
| 1 | Fokker 100 | landing, gear, main, collapsed, undercarriage | 2 | 0.00000 | 0 | 0 |
| 1 | Vickers 782D Viscount | landing, gear, main, collapsed, undercarriage | 1 | 0.00000 | 0 | 0 |
knitr::kable(iran_casn_topic_air %>% filter(topic == 2))| topic | Aircraft | Cause | occurance | Fatality_rate | Total_occupants | Total_fatalities |
|---|---|---|---|---|---|---|
| 2 | Lockheed C-130H Hercules | sea, missing, water, hit, hangar | 1 | 100 | 32 | 32 |
knitr::kable(iran_casn_topic_air %>% filter(topic == 3))| topic | Aircraft | Cause | occurance | Fatality_rate | Total_occupants | Total_fatalities |
|---|---|---|---|---|---|---|
| 3 | Antonov 74T-200 | runway, feet, short, left, rest | 1 | 97.36842 | 38 | 37 |
knitr::kable(iran_casn_topic_air %>% filter(topic == 4))topic Aircraft Cause occurance Fatality_rate Total_occupants Total_fatalities —— ——— —— ———- ————– —————- —————–
knitr::kable(iran_casn_topic_air %>% filter(topic == 5))| topic | Aircraft | Cause | occurance | Fatality_rate | Total_occupants | Total_fatalities |
|---|---|---|---|---|---|---|
| 5 | Ilyushin 76MD | engine, fuel, wing, left, takeoff | 1 | 0 | 0 | 0 |
knitr::kable(iran_casn_topic_air %>% filter(topic == 6))| topic | Aircraft | Cause | occurance | Fatality_rate | Total_occupants | Total_fatalities |
|---|---|---|---|---|---|---|
| 6 | Lockheed L-1329-25 JetStar II | pilot, crew, test, cabin, system | 1 | 100 | 12 | 12 |
| 6 | Antonov 74T-200 | pilot, crew, test, cabin, system | 1 | 0 | 0 | 0 |
knitr::kable(iran_casn_topic_air %>% filter(topic == 7))| topic | Aircraft | Cause | occurance | Fatality_rate | Total_occupants | Total_fatalities |
|---|---|---|---|---|---|---|
| 7 | Airbus A300B2-203 | hijacker, demanded, hijackers, passengers, 1 | 4 | 58.467742 | 496 | 290 |
| 7 | Boeing 727 | hijacker, demanded, hijackers, passengers, 1 | 6 | 1.582278 | 316 | 5 |
| 7 | NA | hijacker, demanded, hijackers, passengers, 1 | 5 | 0.000000 | 0 | 0 |
knitr::kable(iran_casn_topic_air %>% filter(topic == 8))| topic | Aircraft | Cause | occurance | Fatality_rate | Total_occupants | Total_fatalities |
|---|---|---|---|---|---|---|
| 8 | Tupolev 154M | runway, captain, feet, approach, pilot | 1 | 100 | 119 | 119 |
knitr::kable(iran_casn_topic_air %>% filter(topic == 9))topic Aircraft Cause occurance Fatality_rate Total_occupants Total_fatalities —— ——— —— ———- ————– —————- —————–
knitr::kable(iran_casn_topic_air %>% filter(topic == 10))| topic | Aircraft | Cause | occurance | Fatality_rate | Total_occupants | Total_fatalities |
|---|---|---|---|---|---|---|
| 10 | Douglas C-47A-25-DK (DC-3) | crashed, mountain, 2, km, antonov | 2 | 100 | 8 | 8 |
| 10 | Lockheed C-130E Hercules | crashed, mountain, 2, km, antonov | 2 | 0 | 0 | 0 |
| 10 | Dassault Falcon 20E | crashed, mountain, 2, km, antonov | 1 | 0 | 0 | 0 |
knitr::kable(iran_casn_topic_air %>% filter(topic == 11))| topic | Aircraft | Cause | occurance | Fatality_rate | Total_occupants | Total_fatalities |
|---|---|---|---|---|---|---|
| 11 | Boeing 727-86 | accident, destroyed, killed, airport, crew | 1 | 100 | 128 | 128 |
| 11 | Douglas C-47A-35-DL (DC-3) | accident, destroyed, killed, airport, crew | 1 | 96 | 25 | 24 |
| 11 | Lockheed C-130H Hercules | accident, destroyed, killed, airport, crew | 1 | 0 | 0 | 0 |
knitr::kable(iran_casn_topic_air %>% filter(topic == 12))| topic | Aircraft | Cause | occurance | Fatality_rate | Total_occupants | Total_fatalities |
|---|---|---|---|---|---|---|
| 12 | Douglas C-47B-20-DK (DC-3) | fire, caught, ground, destroyed, de | 1 | 0 | 0 | 0 |
knitr::kable(iran_casn_topic_air %>% filter(topic == 13))| topic | Aircraft | Cause | occurance | Fatality_rate | Total_occupants | Total_fatalities |
|---|---|---|---|---|---|---|
| 13 | Boeing 747-131F | feet, departed, crew, reported, cleared | 1 | 100 | 17 | 17 |
| 13 | Lockheed C-130 Hercules | feet, departed, crew, reported, cleared | 1 | 100 | 7 | 7 |
knitr::kable(iran_casn_topic_air %>% filter(topic == 14))| topic | Aircraft | Cause | occurance | Fatality_rate | Total_occupants | Total_fatalities |
|---|---|---|---|---|---|---|
| 14 | Ilyushin 76MD Simorgh | engine, takeoff, landing, forced, lost | 1 | 100 | 7 | 7 |
| 14 | Dassault Falcon 20E | engine, takeoff, landing, forced, lost | 1 | 0 | 0 | 0 |
| 14 | Douglas C-47A-20-DL (DC-3) | engine, takeoff, landing, forced, lost | 1 | 0 | 0 | 0 |
knitr::kable(iran_casn_topic_air %>% filter(topic == 15))| topic | Aircraft | Cause | occurance | Fatality_rate | Total_occupants | Total_fatalities |
|---|---|---|---|---|---|---|
| 15 | Dassault Falcon 20F | damaged, repair, accident, reportedly, raf | 2 | 0 | 0 | 0 |
knitr::kable(iran_casn_topic_air %>% filter(topic == 16))| topic | Aircraft | Cause | occurance | Fatality_rate | Total_occupants | Total_fatalities |
|---|---|---|---|---|---|---|
| 16 | Boeing 727-286 | airport, boeing, international, air, otter | 1 | 57.14286 | 7 | 4 |
knitr::kable(iran_casn_topic_air %>% filter(topic == 17))| topic | Aircraft | Cause | occurance | Fatality_rate | Total_occupants | Total_fatalities |
|---|---|---|---|---|---|---|
| 17 | Fokker F-28 Fellowship 1000 | dc, 3, struck, 4, douglas | 1 | 100 | 66 | 66 |
knitr::kable(iran_casn_topic_air %>% filter(topic == 18))| topic | Aircraft | Cause | occurance | Fatality_rate | Total_occupants | Total_fatalities |
|---|---|---|---|---|---|---|
| 18 | Tupolev Tu-154M | en, route, night, operation, NA | 1 | 100 | 131 | 131 |
| 18 | Douglas C-47 (DC-3) | en, route, night, operation, NA | 1 | 0 | 0 | 0 |
| 18 | Lockheed P-3F Orion | en, route, night, operation, NA | 1 | 0 | 0 | 0 |
knitr::kable(iran_casn_topic_air %>% filter(topic == 19))| topic | Aircraft | Cause | occurance | Fatality_rate | Total_occupants | Total_fatalities |
|---|---|---|---|---|---|---|
| 19 | Lockheed C-130 Hercules | air, force, transport, base, flying | 2 | 0 | 0 | 0 |
| 19 | Airbus A300B2-203 | air, force, transport, base, flying | 1 | 0 | 0 | 0 |
| 19 | Lockheed C-130E Hercules | air, force, transport, base, flying | 1 | 0 | 0 | 0 |
knitr::kable(iran_casn_topic_air %>% filter(topic == 20))| topic | Aircraft | Cause | occurance | Fatality_rate | Total_occupants | Total_fatalities |
|---|---|---|---|---|---|---|
| 20 | Lockheed C-130 Hercules | approach, weather, pilot, conditions, visibility | 1 | 100 | 103 | 103 |
| 20 | de Havilland Canada DHC-6 Twin Otter 300 | approach, weather, pilot, conditions, visibility | 1 | 100 | 3 | 3 |
| 20 | Lockheed C-130H Hercules | approach, weather, pilot, conditions, visibility | 1 | 0 | 0 | 0 |